1
2
3
4
5
6
7
8
9
10
11 """Restriction Enzyme classes.
12
13 Notes about the diverses class of the restriction enzyme implementation::
14
15 RestrictionType is the type of all restriction enzymes.
16 ----------------------------------------------------------------------------
17 AbstractCut implements some methods that are common to all enzymes.
18 ----------------------------------------------------------------------------
19 NoCut, OneCut,TwoCuts represent the number of double strand cuts
20 produced by the enzyme.
21 they correspond to the 4th field of the
22 rebase record emboss_e.NNN.
23 0->NoCut : the enzyme is not characterised.
24 2->OneCut : the enzyme produce one double strand cut.
25 4->TwoCuts : two double strand cuts.
26 ----------------------------------------------------------------------------
27 Meth_Dep, Meth_Undep represent the methylation susceptibility to
28 the enzyme.
29 Not implemented yet.
30 ----------------------------------------------------------------------------
31 Palindromic, if the site is palindromic or not.
32 NotPalindromic allow some optimisations of the code.
33 No need to check the reverse strand
34 with palindromic sites.
35 ----------------------------------------------------------------------------
36 Unknown, Blunt, represent the overhang.
37 Ov5, Ov3 Unknown is here for symmetry reasons and
38 correspond to enzymes that are not
39 characterised in rebase.
40 ----------------------------------------------------------------------------
41 Defined, Ambiguous, represent the sequence of the overhang.
42 NotDefined
43 NotDefined is for enzymes not characterised
44 in rebase.
45
46 Defined correspond to enzymes that display
47 a constant overhang whatever the sequence.
48 ex : EcoRI. G^AATTC -> overhang :AATT
49 CTTAA^G
50
51 Ambiguous : the overhang varies with the
52 sequence restricted.
53 Typically enzymes which cut outside their
54 restriction site or (but not always)
55 inside an ambiguous site.
56 ex:
57 AcuI CTGAAG(22/20) -> overhang : NN
58 AasI GACNNN^NNNGTC -> overhang : NN
59 CTGN^NNNNNCAG
60
61 note : these 3 classes refers to the overhang not the site.
62 So the enzyme ApoI (RAATTY) is defined even if its
63 restriction site is ambiguous.
64
65 ApoI R^AATTY -> overhang : AATT -> Defined
66 YTTAA^R
67 Accordingly, blunt enzymes are always Defined even
68 when they cut outside their restriction site.
69 ----------------------------------------------------------------------------
70 Not_available, as found in rebase file emboss_r.NNN files.
71 Commercially_available
72 allow the selection of the enzymes
73 according to their suppliers to reduce the
74 quantity of results.
75 Also will allow the implementation of
76 buffer compatibility tables. Not
77 implemented yet.
78
79 the list of suppliers is extracted from
80 emboss_s.NNN
81 ----------------------------------------------------------------------------
82
83 """
84
85 from __future__ import print_function
86
87 import warnings
88
89 from Bio._py3k import zip
90 from Bio._py3k import filter
91 from Bio._py3k import range
92
93 import re
94 import itertools
95
96 from Bio.Seq import Seq, MutableSeq
97 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
98 from Bio.Restriction.Restriction_Dictionary import typedict
99 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
100 from Bio.Restriction.PrintFormat import PrintFormat
101 from Bio import BiopythonWarning
109 """Check characters in a string (PRIVATE).
110
111 Remove digits and white space present in string. Allows any valid ambiguous
112 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
113
114 Other characters (e.g. symbols) trigger a TypeError.
115
116 Returns the string WITH A LEADING SPACE (!). This is for backwards
117 compatibility, and may in part be explained by the fact that
118 Bio.Restriction doesn't use zero based counting.
119 """
120
121 seq_string = "".join(seq_string.split()).upper()
122
123 for c in "0123456789":
124 seq_string = seq_string.replace(c, "")
125
126 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
127 raise TypeError("Invalid character found in %s" % repr(seq_string))
128 return " " + seq_string
129
130
131 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN',
132 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY',
133 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY',
134 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY',
135 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY',
136 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'}
137
138 DNA = Seq
238
241 """RestrictionType. Type from which all enzyme classes are derived.
242
243 Implement the operator methods.
244 """
245
246 - def __init__(cls, name='', bases=(), dct=None):
247 """Initialize RestrictionType instance.
248
249 Not intended to be used in normal operation. The enzymes are
250 instantiated when importing the module.
251 See below.
252 """
253 if "-" in name:
254 raise ValueError("Problem with hyphen in %s as enzyme name"
255 % repr(name))
256
257
258
259 try:
260 cls.compsite = re.compile(cls.compsite)
261 except AttributeError:
262
263
264
265 pass
266 except Exception:
267 raise ValueError("Problem with regular expression, re.compiled(%s)"
268 % repr(cls.compsite))
269
282
284 """Override '/' operator to use as search method.
285
286 >>> EcoRI/Seq('GAATTC')
287 [2]
288 Returns RE.search(other).
289 """
290 return cls.search(other)
291
293 """Override division with reversed operands to use as search method.
294
295 >>> Seq('GAATTC')/EcoRI
296 [2]
297 Returns RE.search(other).
298 """
299 return cls.search(other)
300
302 """Override Python 3 division operator to use as search method.
303
304 Like __div__.
305 """
306 return cls.search(other)
307
309 """As __truediv___, with reversed operands.
310
311 Like __rdiv__.
312 """
313 return cls.search(other)
314
316 """Override '//' operator to use as catalyse method.
317
318 >>> EcoRI//Seq('GAATTC')
319 (Seq('G', Alphabet()), Seq('AATTC', Alphabet()))
320 Returns RE.catalyse(other).
321 """
322 return cls.catalyse(other)
323
325 """As __floordiv__, with reversed operands.
326
327 >>> Seq('GAATTC')//EcoRI
328 (Seq('G', Alphabet()), Seq('AATTC', Alphabet()))
329 Returns RE.catalyse(other).
330 """
331 return cls.catalyse(other)
332
334 """Return the name of the enzyme as string."""
335 return cls.__name__
336
338 """Implement repr method.
339
340 Used with eval or exec will instantiate the enzyme.
341 """
342 return "%s" % cls.__name__
343
345 """Return length of recognition site of enzyme as int."""
346 try:
347 return cls.size
348 except AttributeError:
349
350
351
352
353 return 0
354
356
357
358 return id(cls)
359
361 """Override '==' operator.
362
363 True if RE and other are the same enzyme.
364
365 Specifically this checks they are the same Python object.
366 """
367
368 return id(cls) == id(other)
369
371 """Override '!=' operator.
372
373 Isoschizomer strict (same recognition site, same restriction) -> False
374 All the other-> True
375
376 WARNING - This is not the inverse of the __eq__ method
377 >>> SacI != SstI # true isoschizomers
378 False
379 >>> SacI == SstI
380 False
381 """
382 if not isinstance(other, RestrictionType):
383 return True
384 elif cls.charac == other.charac:
385 return False
386 else:
387 return True
388
390 """Override '>>' operator to test for neoschizomers.
391
392 neoschizomer : same recognition site, different restriction. -> True
393 all the others : -> False
394 >>> SmaI >> XmaI
395 True
396 """
397 if not isinstance(other, RestrictionType):
398 return False
399 elif cls.site == other.site and cls.charac != other.charac:
400 return True
401 else:
402 return False
403
405 """Override '%' operator to test for compatible overhangs.
406
407 True if a and b have compatible overhang.
408 >>> XhoI % SalI
409 True
410 """
411 if not isinstance(other, RestrictionType):
412 raise TypeError(
413 'expected RestrictionType, got %s instead' % type(other))
414 return cls._mod1(other)
415
417 """Compare length of recognition site of two enzymes.
418
419 Override '>='. a is greater or equal than b if the a site is longer
420 than b site. If their site have the same length sort by alphabetical
421 order of their names.
422 >>> EcoRI.size
423 6
424 >>> EcoRV.size
425 6
426 >>> EcoRI >= EcoRV
427 False
428 """
429 if not isinstance(other, RestrictionType):
430 raise NotImplementedError
431 if len(cls) > len(other):
432 return True
433 elif cls.size == len(other) and cls.__name__ >= other.__name__:
434 return True
435 else:
436 return False
437
439 """Compare length of recognition site of two enzymes.
440
441 Override '>'. Sorting order:
442 1. size of the recognition site.
443 2. if equal size, alphabetical order of the names.
444
445 """
446 if not isinstance(other, RestrictionType):
447 raise NotImplementedError
448 if len(cls) > len(other):
449 return True
450 elif cls.size == len(other) and cls.__name__ > other.__name__:
451 return True
452 else:
453 return False
454
456 """Compare length of recognition site of two enzymes.
457
458 Override '<='. Sorting order:
459 1. size of the recognition site.
460 2. if equal size, alphabetical order of the names.
461
462 """
463 if not isinstance(other, RestrictionType):
464 raise NotImplementedError
465 elif len(cls) < len(other):
466 return True
467 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
468 return True
469 else:
470 return False
471
473 """Compare length of recognition site of two enzymes.
474
475 Override '<'. Sorting order:
476 1. size of the recognition site.
477 2. if equal size, alphabetical order of the names.
478
479 """
480 if not isinstance(other, RestrictionType):
481 raise NotImplementedError
482 elif len(cls) < len(other):
483 return True
484 elif len(cls) == len(other) and cls.__name__ < other.__name__:
485 return True
486 else:
487 return False
488
491 """Implement the methods that are common to all restriction enzymes.
492
493 All the methods are classmethod.
494
495 For internal use only. Not meant to be instantiated.
496 """
497
498 @classmethod
499 - def search(cls, dna, linear=True):
500 """Return a list of cutting sites of the enzyme in the sequence.
501
502 Compensate for circular sequences and so on.
503
504 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
505
506 If linear is False, the restriction sites that span over the boundaries
507 will be included.
508
509 The positions are the first base of the 3' fragment,
510 i.e. the first base after the position the enzyme will cut.
511 """
512
513
514
515
516
517
518
519 if isinstance(dna, FormattedSeq):
520 cls.dna = dna
521 return cls._search()
522 else:
523 cls.dna = FormattedSeq(dna, linear)
524 return cls._search()
525
526 @classmethod
528 """Print all the suppliers of restriction enzyme."""
529 supply = sorted(x[0] for x in suppliers_dict.values())
530 print(",\n".join(supply))
531 return
532
533 @classmethod
535 """Test for real isoschizomer.
536
537 True if other is an isoschizomer of RE, but not an neoschizomer,
538 else False.
539
540 Equischizomer: same site, same position of restriction.
541 >>> SacI.is_equischizomer(SstI)
542 True
543 >>> SmaI.is_equischizomer(XmaI)
544 False
545
546 """
547 return not cls != other
548
549 @classmethod
551 """Test for neoschizomer.
552
553 True if other is an isoschizomer of RE, else False.
554 Neoschizomer: same site, different position of restriction.
555 """
556 return cls >> other
557
558 @classmethod
560 """Test for same recognition site.
561
562 True if other has the same recognition site, else False.
563
564 Isoschizomer: same site.
565 >>> SacI.is_isoschizomer(SstI)
566 True
567 >>> SmaI.is_isoschizomer(XmaI)
568 True
569
570 """
571 return (not cls != other) or cls >> other
572
573 @classmethod
575 """List equischizomers of the enzyme.
576
577 Return a tuple of all the isoschizomers of RE.
578 If batch is supplied it is used instead of the default AllEnzymes.
579
580 Equischizomer: same site, same position of restriction.
581 """
582 if not batch:
583 batch = AllEnzymes
584 r = [x for x in batch if not cls != x]
585 i = r.index(cls)
586 del r[i]
587 r.sort()
588 return r
589
590 @classmethod
592 """List neoschizomers of the enzyme.
593
594 Return a tuple of all the neoschizomers of RE.
595 If batch is supplied it is used instead of the default AllEnzymes.
596
597 Neoschizomer: same site, different position of restriction.
598 """
599 if not batch:
600 batch = AllEnzymes
601 r = sorted(x for x in batch if cls >> x)
602 return r
603
604 @classmethod
606 """List all isoschizomers of the enzyme.
607
608 Return a tuple of all the equischizomers and neoschizomers of RE.
609 If batch is supplied it is used instead of the default AllEnzymes.
610 """
611 if not batch:
612 batch = AllEnzymes
613 r = [x for x in batch if (cls >> x) or (not cls != x)]
614 i = r.index(cls)
615 del r[i]
616 r.sort()
617 return r
618
619 @classmethod
621 """Return the theoretically cutting frequency of the enzyme.
622
623 Frequency of the site, given as 'one cut per x bases' (int).
624 """
625 return cls.freq
626
627
628 -class NoCut(AbstractCut):
629 """Implement the methods specific to the enzymes that do not cut.
630
631 These enzymes are generally enzymes that have been only partially
632 characterised and the way they cut the DNA is unknow or enzymes for
633 which the pattern of cut is to complex to be recorded in Rebase
634 (ncuts values of 0 in emboss_e.###).
635
636 When using search() with these enzymes the values returned are at the start
637 of the restriction site.
638
639 Their catalyse() method returns a TypeError.
640
641 Unknown and NotDefined are also part of the base classes of these enzymes.
642
643 Internal use only. Not meant to be instantiated.
644 """
645
646 @classmethod
648 """Return if the cutting pattern has one cut.
649
650 True if the enzyme cut the sequence one time on each strand.
651 """
652 return False
653
654 @classmethod
656 """Return if the cutting pattern has two cuts.
657
658 True if the enzyme cut the sequence twice on each strand.
659 """
660 return False
661
662 @classmethod
664 """Return a generator that moves the cutting position by 1 (PRIVATE).
665
666 For internal use only.
667
668 location is an integer corresponding to the location of the match for
669 the enzyme pattern in the sequence.
670 _modify returns the real place where the enzyme will cut.
671
672 Example::
673
674 EcoRI pattern : GAATTC
675 EcoRI will cut after the G.
676 so in the sequence:
677 ______
678 GAATACACGGAATTCGA
679 |
680 10
681 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
682 EcoRI cut after the G so:
683 EcoRI._modify(10) -> 11.
684
685 If the enzyme cut twice _modify will returns two integer corresponding
686 to each cutting site.
687 """
688 yield location
689
690 @classmethod
692 """Return a generator that moves the cutting position by 1 (PRIVATE).
693
694 For internal use only.
695
696 As _modify for site situated on the antiparallel strand when the
697 enzyme is not palindromic.
698 """
699 yield location
700
701 @classmethod
703 """Return a list of the enzyme's characteristics as tuple.
704
705 the tuple contains the attributes:
706 - fst5 -> first 5' cut ((current strand) or None
707 - fst3 -> first 3' cut (complementary strand) or None
708 - scd5 -> second 5' cut (current strand) or None
709 - scd5 -> second 3' cut (complementary strand) or None
710 - site -> recognition site.
711
712 """
713 return None, None, None, None, cls.site
714
715
716 -class OneCut(AbstractCut):
717 """Implement the methods for enzymes that cut the DNA only once.
718
719 Correspond to ncuts values of 2 in emboss_e.###
720
721 Internal use only. Not meant to be instantiated.
722 """
723
724 @classmethod
726 """Return if the cutting pattern has one cut.
727
728 True if the enzyme cut the sequence one time on each strand.
729 """
730 return True
731
732 @classmethod
734 """Return if the cutting pattern has two cuts.
735
736 True if the enzyme cut the sequence twice on each strand.
737 """
738 return False
739
740 @classmethod
742 """Return a generator that moves the cutting position by 1 (PRIVATE).
743
744 For internal use only.
745
746 location is an integer corresponding to the location of the match for
747 the enzyme pattern in the sequence.
748 _modify returns the real place where the enzyme will cut.
749
750 Example::
751
752 EcoRI pattern : GAATTC
753 EcoRI will cut after the G.
754 so in the sequence:
755 ______
756 GAATACACGGAATTCGA
757 |
758 10
759 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
760 EcoRI cut after the G so:
761 EcoRI._modify(10) -> 11.
762
763 if the enzyme cut twice _modify will returns two integer corresponding
764 to each cutting site.
765 """
766 yield location + cls.fst5
767
768 @classmethod
770 """Return a generator that moves the cutting position by 1 (PRIVATE).
771
772 For internal use only.
773
774 As _modify for site situated on the antiparallel strand when the
775 enzyme is not palindromic
776 """
777 yield location - cls.fst3
778
779 @classmethod
781 """Return a list of the enzyme's characteristics as tuple.
782
783 The tuple contains the attributes:
784 - fst5 -> first 5' cut ((current strand) or None
785 - fst3 -> first 3' cut (complementary strand) or None
786 - scd5 -> second 5' cut (current strand) or None
787 - scd5 -> second 3' cut (complementary strand) or None
788 - site -> recognition site.
789
790 """
791 return cls.fst5, cls.fst3, None, None, cls.site
792
795 """Implement the methods for enzymes that cut the DNA twice.
796
797 Correspond to ncuts values of 4 in emboss_e.###
798
799 Internal use only. Not meant to be instantiated.
800 """
801
802 @classmethod
804 """Return if the cutting pattern has one cut.
805
806 True if the enzyme cut the sequence one time on each strand.
807 """
808 return False
809
810 @classmethod
812 """Return if the cutting pattern has two cuts.
813
814 True if the enzyme cut the sequence twice on each strand.
815 """
816 return True
817
818 @classmethod
820 """Return a generator that moves the cutting position by 1 (PRIVATE).
821
822 For internal use only.
823
824 location is an integer corresponding to the location of the match for
825 the enzyme pattern in the sequence.
826 _modify returns the real place where the enzyme will cut.
827
828 example::
829
830 EcoRI pattern : GAATTC
831 EcoRI will cut after the G.
832 so in the sequence:
833 ______
834 GAATACACGGAATTCGA
835 |
836 10
837 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
838 EcoRI cut after the G so:
839 EcoRI._modify(10) -> 11.
840
841 if the enzyme cut twice _modify will returns two integer corresponding
842 to each cutting site.
843 """
844 yield location + cls.fst5
845 yield location + cls.scd5
846
847 @classmethod
849 """Return a generator that moves the cutting position by 1 (PRIVATE).
850
851 for internal use only.
852
853 as _modify for site situated on the antiparallel strand when the
854 enzyme is not palindromic
855 """
856 yield location - cls.fst3
857 yield location - cls.scd3
858
859 @classmethod
861 """Return a list of the enzyme's characteristics as tuple.
862
863 the tuple contains the attributes:
864 - fst5 -> first 5' cut ((current strand) or None
865 - fst3 -> first 3' cut (complementary strand) or None
866 - scd5 -> second 5' cut (current strand) or None
867 - scd5 -> second 3' cut (complementary strand) or None
868 - site -> recognition site.
869
870 """
871 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
872
875 """Implement the information about methylation.
876
877 Enzymes of this class possess a site which is methylable.
878 """
879
880 @classmethod
882 """Return if recognition site can be methylated.
883
884 True if the recognition site is a methylable.
885 """
886 return True
887
890 """Implement information about methylation sensitibility.
891
892 Enzymes of this class are not sensible to methylation.
893 """
894
895 @classmethod
897 """Return if recognition site can be methylated.
898
899 True if the recognition site is a methylable.
900 """
901 return False
902
905 """Implement methods for enzymes with palindromic recognition sites.
906
907 palindromic means : the recognition site and its reverse complement are
908 identical.
909 Remarks : an enzyme with a site CGNNCG is palindromic even if some
910 of the sites that it will recognise are not.
911 for example here : CGAACG
912
913 Internal use only. Not meant to be instantiated.
914 """
915
916 @classmethod
918 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
919
920 For internal use only.
921
922 Implement the search method for palindromic enzymes.
923 """
924 siteloc = cls.dna.finditer(cls.compsite, cls.size)
925 cls.results = [r for s, g in siteloc for r in cls._modify(s)]
926 if cls.results:
927 cls._drop()
928 return cls.results
929
930 @classmethod
932 """Return if the enzyme has a palindromic recoginition site."""
933 return True
934
937 """Implement methods for enzymes with non-palindromic recognition sites.
938
939 Palindromic means : the recognition site and its reverse complement are
940 identical.
941
942 Internal use only. Not meant to be instantiated.
943 """
944
945 @classmethod
947 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
948
949 For internal use only.
950
951 Implement the search method for non palindromic enzymes.
952 """
953 iterator = cls.dna.finditer(cls.compsite, cls.size)
954 cls.results = []
955 modif = cls._modify
956 revmodif = cls._rev_modify
957 s = str(cls)
958 cls.on_minus = []
959
960 for start, group in iterator:
961 if group(s):
962 cls.results += [r for r in modif(start)]
963 else:
964 cls.on_minus += [r for r in revmodif(start)]
965 cls.results += cls.on_minus
966
967 if cls.results:
968 cls.results.sort()
969 cls._drop()
970 return cls.results
971
972 @classmethod
974 """Return if the enzyme has a palindromic recoginition site."""
975 return False
976
979 """Implement methods for enzymes that produce unknown overhangs.
980
981 These enzymes are also NotDefined and NoCut.
982
983 Internal use only. Not meant to be instantiated.
984 """
985
986 @classmethod
988 """List the sequence fragments after cutting dna with enzyme.
989
990 RE.catalyze(dna, linear=True) -> tuple of DNA.
991
992 Return a tuple of dna as will be produced by using RE to restrict the
993 dna.
994
995 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
996
997 If linear is False, the sequence is considered to be circular and the
998 output will be modified accordingly.
999 """
1000 raise NotImplementedError('%s restriction is unknown.'
1001 % cls.__name__)
1002 catalyze = catalyse
1003
1004 @classmethod
1006 """Return if the enzyme produces blunt ends.
1007
1008 True if the enzyme produces blunt end.
1009
1010 Related methods:
1011 - RE.is_3overhang()
1012 - RE.is_5overhang()
1013 - RE.is_unknown()
1014
1015 """
1016 return False
1017
1018 @classmethod
1020 """Return if the enzymes produces 5' overhanging ends.
1021
1022 True if the enzyme produces 5' overhang sticky end.
1023
1024 Related methods:
1025 - RE.is_3overhang()
1026 - RE.is_blunt()
1027 - RE.is_unknown()
1028
1029 """
1030 return False
1031
1032 @classmethod
1034 """Return if the enzyme produces 3' overhanging ends.
1035
1036 True if the enzyme produces 3' overhang sticky end.
1037
1038 Related methods:
1039 - RE.is_5overhang()
1040 - RE.is_blunt()
1041 - RE.is_unknown()
1042
1043 """
1044 return False
1045
1046 @classmethod
1048 """Return the type of the enzyme's overhang as string.
1049
1050 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1051 """
1052 return 'unknown'
1053
1054 @classmethod
1056 """List all enzymes that produce compatible ends for the enzyme."""
1057 return []
1058
1059 @classmethod
1061 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1062
1063 For internal use only.
1064
1065 Test for the compatibility of restriction ending of RE and other.
1066 """
1067 return False
1068
1069
1070 -class Blunt(AbstractCut):
1071 """Implement methods for enzymes that produce blunt ends.
1072
1073 The enzyme cuts the + strand and the - strand of the DNA at the same
1074 place.
1075
1076 Internal use only. Not meant to be instantiated.
1077 """
1078
1079 @classmethod
1081 """List the sequence fragments after cutting dna with enzyme.
1082
1083 RE.catalyze(dna, linear=True) -> tuple of DNA.
1084
1085 Return a tuple of dna as will be produced by using RE to restrict the
1086 dna.
1087
1088 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1089
1090 If linear is False, the sequence is considered to be circular and the
1091 output will be modified accordingly.
1092 """
1093 r = cls.search(dna, linear)
1094 d = cls.dna
1095 if not r:
1096 return d[1:],
1097 fragments = []
1098 length = len(r) - 1
1099 if d.is_linear():
1100
1101
1102
1103 fragments.append(d[1:r[0]])
1104 if length:
1105
1106
1107
1108 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1109
1110
1111
1112 fragments.append(d[r[-1]:])
1113 else:
1114
1115
1116
1117 fragments.append(d[r[-1]:] + d[1:r[0]])
1118 if not length:
1119
1120
1121
1122 return tuple(fragments)
1123
1124
1125
1126 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1127 return tuple(fragments)
1128 catalyze = catalyse
1129
1130 @classmethod
1132 """Return if the enzyme produces blunt ends.
1133
1134 True if the enzyme produces blunt end.
1135
1136 Related methods:
1137 - RE.is_3overhang()
1138 - RE.is_5overhang()
1139 - RE.is_unknown()
1140
1141 """
1142 return True
1143
1144 @classmethod
1146 """Return if the enzymes produces 5' overhanging ends.
1147
1148 True if the enzyme produces 5' overhang sticky end.
1149
1150 Related methods:
1151 - RE.is_3overhang()
1152 - RE.is_blunt()
1153 - RE.is_unknown()
1154
1155 """
1156 return False
1157
1158 @classmethod
1160 """Return if the enzyme produces 3' overhanging ends.
1161
1162 True if the enzyme produces 3' overhang sticky end.
1163
1164 Related methods:
1165 - RE.is_5overhang()
1166 - RE.is_blunt()
1167 - RE.is_unknown()
1168
1169 """
1170 return False
1171
1172 @classmethod
1174 """Return the type of the enzyme's overhang as string.
1175
1176 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1177 """
1178 return 'blunt'
1179
1180 @classmethod
1182 """List all enzymes that produce compatible ends for the enzyme."""
1183 if not batch:
1184 batch = AllEnzymes
1185 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1186 return r
1187
1188 @staticmethod
1190 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1191
1192 For internal use only
1193
1194 Test for the compatibility of restriction ending of RE and other.
1195 """
1196 return issubclass(other, Blunt)
1197
1198
1199 -class Ov5(AbstractCut):
1200 """Implement methods for enzymes that produce 5' overhanging ends.
1201
1202 The enzyme cuts the + strand after the - strand of the DNA.
1203
1204 Internal use only. Not meant to be instantiated.
1205 """
1206
1207 @classmethod
1209 """List the sequence fragments after cutting dna with enzyme.
1210
1211 RE.catalyze(dna, linear=True) -> tuple of DNA.
1212
1213 Return a tuple of dna as will be produced by using RE to restrict the
1214 dna.
1215
1216 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1217
1218 If linear is False, the sequence is considered to be circular and the
1219 output will be modified accordingly.
1220 """
1221 r = cls.search(dna, linear)
1222 d = cls.dna
1223 if not r:
1224 return d[1:],
1225 length = len(r) - 1
1226 fragments = []
1227 if d.is_linear():
1228
1229
1230
1231 fragments.append(d[1:r[0]])
1232 if length:
1233
1234
1235
1236 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1237
1238
1239
1240 fragments.append(d[r[-1]:])
1241 else:
1242
1243
1244
1245 fragments.append(d[r[-1]:] + d[1:r[0]])
1246 if not length:
1247
1248
1249
1250 return tuple(fragments)
1251
1252
1253
1254 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1255 return tuple(fragments)
1256 catalyze = catalyse
1257
1258 @classmethod
1260 """Return if the enzyme produces blunt ends.
1261
1262 True if the enzyme produces blunt end.
1263
1264 Related methods:
1265 - RE.is_3overhang()
1266 - RE.is_5overhang()
1267 - RE.is_unknown()
1268
1269 """
1270 return False
1271
1272 @classmethod
1274 """Return if the enzymes produces 5' overhanging ends.
1275
1276 True if the enzyme produces 5' overhang sticky end.
1277
1278 Related methods:
1279 - RE.is_3overhang()
1280 - RE.is_blunt()
1281 - RE.is_unknown()
1282
1283 """
1284 return True
1285
1286 @classmethod
1288 """Return if the enzyme produces 3' overhanging ends.
1289
1290 True if the enzyme produces 3' overhang sticky end.
1291
1292 Related methods:
1293 - RE.is_5overhang()
1294 - RE.is_blunt()
1295 - RE.is_unknown()
1296
1297 """
1298 return False
1299
1300 @classmethod
1302 """Return the type of the enzyme's overhang as string.
1303
1304 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1305 """
1306 return "5' overhang"
1307
1308 @classmethod
1310 """List all enzymes that produce compatible ends for the enzyme."""
1311 if not batch:
1312 batch = AllEnzymes
1313 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and
1314 x % cls)
1315 return r
1316
1317 @classmethod
1319 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1320
1321 For internal use only.
1322
1323 Test for the compatibility of restriction ending of RE and other.
1324 """
1325 if issubclass(other, Ov5):
1326 return cls._mod2(other)
1327 else:
1328 return False
1329
1330
1331 -class Ov3(AbstractCut):
1332 """Implement methods for enzymes that produce 3' overhanging ends.
1333
1334 The enzyme cuts the - strand after the + strand of the DNA.
1335
1336 Internal use only. Not meant to be instantiated.
1337 """
1338
1339 @classmethod
1341 """List the sequence fragments after cutting dna with enzyme.
1342
1343 RE.catalyze(dna, linear=True) -> tuple of DNA.
1344
1345 Return a tuple of dna as will be produced by using RE to restrict the
1346 dna.
1347
1348 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1349
1350 If linear is False, the sequence is considered to be circular and the
1351 output will be modified accordingly.
1352 """
1353 r = cls.search(dna, linear)
1354 d = cls.dna
1355 if not r:
1356 return d[1:],
1357 fragments = []
1358 length = len(r) - 1
1359 if d.is_linear():
1360
1361
1362
1363 fragments.append(d[1:r[0]])
1364 if length:
1365
1366
1367
1368 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1369
1370
1371
1372 fragments.append(d[r[-1]:])
1373 else:
1374
1375
1376
1377 fragments.append(d[r[-1]:] + d[1:r[0]])
1378 if not length:
1379
1380
1381
1382 return tuple(fragments)
1383
1384
1385
1386 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1387 return tuple(fragments)
1388 catalyze = catalyse
1389
1390 @classmethod
1392 """Return if the enzyme produces blunt ends.
1393
1394 True if the enzyme produces blunt end.
1395
1396 Related methods:
1397 - RE.is_3overhang()
1398 - RE.is_5overhang()
1399 - RE.is_unknown()
1400
1401 """
1402 return False
1403
1404 @classmethod
1406 """Return if the enzymes produces 5' overhanging ends.
1407
1408 True if the enzyme produces 5' overhang sticky end.
1409
1410 Related methods:
1411 - RE.is_3overhang()
1412 - RE.is_blunt()
1413 - RE.is_unknown()
1414
1415 """
1416 return False
1417
1418 @classmethod
1420 """Return if the enzyme produces 3' overhanging ends.
1421
1422 True if the enzyme produces 3' overhang sticky end.
1423
1424 Related methods:
1425 - RE.is_5overhang()
1426 - RE.is_blunt()
1427 - RE.is_unknown()
1428
1429 """
1430 return True
1431
1432 @classmethod
1434 """Return the type of the enzyme's overhang as string.
1435
1436 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1437 """
1438 return "3' overhang"
1439
1440 @classmethod
1442 """List all enzymes that produce compatible ends for the enzyme."""
1443 if not batch:
1444 batch = AllEnzymes
1445 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and
1446 x % cls)
1447 return r
1448
1449 @classmethod
1451 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1452
1453 For internal use only.
1454
1455 Test for the compatibility of restriction ending of RE and other.
1456 """
1457
1458
1459
1460 if issubclass(other, Ov3):
1461 return cls._mod2(other)
1462 else:
1463 return False
1464
1467 """Implement methods for enzymes with defined recognition site and cut.
1468
1469 Typical example : EcoRI -> G^AATT_C
1470 The overhang will always be AATT
1471 Notes:
1472 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1473 Their overhang is always the same : blunt!
1474
1475 Internal use only. Not meant to be instantiated.
1476 """
1477
1478 @classmethod
1480 """Remove cuts that are outsite of the sequence (PRIVATE).
1481
1482 For internal use only.
1483
1484 Drop the site that are situated outside the sequence in linear
1485 sequence. Modify the index for site in circular sequences.
1486 """
1487
1488
1489
1490
1491
1492
1493
1494
1495 length = len(cls.dna)
1496 drop = itertools.dropwhile
1497 take = itertools.takewhile
1498 if cls.dna.is_linear():
1499 cls.results = [x for x in drop(lambda x:x <= 1, cls.results)]
1500 cls.results = [x for x in take(lambda x:x <= length, cls.results)]
1501 else:
1502 for index, location in enumerate(cls.results):
1503 if location < 1:
1504 cls.results[index] += length
1505 else:
1506 break
1507 for index, location in enumerate(cls.results[::-1]):
1508 if location > length:
1509 cls.results[-(index + 1)] -= length
1510 else:
1511 break
1512 return
1513
1514 @classmethod
1516 """Return if recognition sequence and cut are defined.
1517
1518 True if the sequence recognised and cut is constant,
1519 i.e. the recognition site is not degenerated AND the enzyme cut inside
1520 the site.
1521
1522 Related methods:
1523 - RE.is_ambiguous()
1524 - RE.is_unknown()
1525
1526 """
1527 return True
1528
1529 @classmethod
1531 """Return if recognition sequence and cut may be ambiguous.
1532
1533 True if the sequence recognised and cut is ambiguous,
1534 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1535 the site.
1536
1537 Related methods:
1538 - RE.is_defined()
1539 - RE.is_unknown()
1540
1541 """
1542 return False
1543
1544 @classmethod
1546 """Return if recognition sequence is unknown.
1547
1548 True if the sequence is unknown,
1549 i.e. the recognition site has not been characterised yet.
1550
1551 Related methods:
1552 - RE.is_defined()
1553 - RE.is_ambiguous()
1554
1555 """
1556 return False
1557
1558 @classmethod
1560 """Return a string representing the recognition site and cuttings.
1561
1562 Return a representation of the site with the cut on the (+) strand
1563 represented as '^' and the cut on the (-) strand as '_'.
1564 ie:
1565
1566 >>> EcoRI.elucidate() # 5' overhang
1567 'G^AATT_C'
1568 >>> KpnI.elucidate() # 3' overhang
1569 'G_GTAC^C'
1570 >>> EcoRV.elucidate() # blunt
1571 'GAT^_ATC'
1572 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1573 '? GTATAC ?'
1574 >>>
1575
1576 """
1577 f5 = cls.fst5
1578 f3 = cls.fst3
1579 site = cls.site
1580 if cls.cut_twice():
1581 re = 'cut twice, not yet implemented sorry.'
1582 elif cls.is_5overhang():
1583 if f5 == f3 == 0:
1584 re = 'N^' + cls.site + '_N'
1585 elif f3 == 0:
1586 re = site[:f5] + '^' + site[f5:] + '_N'
1587 else:
1588 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1589 elif cls.is_blunt():
1590 re = site[:f5] + '^_' + site[f5:]
1591 else:
1592 if f5 == f3 == 0:
1593 re = 'N_' + site + '^N'
1594 else:
1595 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1596 return re
1597
1598 @classmethod
1600 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1601
1602 For internal use only.
1603
1604 Test for the compatibility of restriction ending of RE and other.
1605 """
1606
1607
1608
1609 if other.ovhgseq == cls.ovhgseq:
1610 return True
1611 elif issubclass(other, Ambiguous):
1612 return other._mod2(cls)
1613 else:
1614 return False
1615
1618 """Implement methods for enzymes that produce variable overhangs.
1619
1620 Typical example : BstXI -> CCAN_NNNN^NTGG
1621 The overhang can be any sequence of 4 bases.
1622
1623 Notes:
1624 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1625 Their overhang is always the same : blunt!
1626
1627 Internal use only. Not meant to be instantiated.
1628
1629 """
1630
1631 @classmethod
1658
1659 @classmethod
1661 """Return if recognition sequence and cut are defined.
1662
1663 True if the sequence recognised and cut is constant,
1664 i.e. the recognition site is not degenerated AND the enzyme cut inside
1665 the site.
1666
1667 Related methods:
1668 - RE.is_ambiguous()
1669 - RE.is_unknown()
1670
1671 """
1672 return False
1673
1674 @classmethod
1676 """Return if recognition sequence and cut may be ambiguous.
1677
1678 True if the sequence recognised and cut is ambiguous,
1679 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1680 the site.
1681
1682 Related methods:
1683 - RE.is_defined()
1684 - RE.is_unknown()
1685
1686 """
1687 return True
1688
1689 @classmethod
1691 """Return if recognition sequence is unknown.
1692
1693 True if the sequence is unknown,
1694 i.e. the recognition site has not been characterised yet.
1695
1696 Related methods:
1697 - RE.is_defined()
1698 - RE.is_ambiguous()
1699
1700 """
1701 return False
1702
1703 @classmethod
1705 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1706
1707 For internal use only.
1708
1709 Test for the compatibility of restriction ending of RE and other.
1710 """
1711
1712
1713
1714 if len(cls.ovhgseq) != len(other.ovhgseq):
1715 return False
1716 else:
1717 se = cls.ovhgseq
1718 for base in se:
1719 if base in 'ATCG':
1720 pass
1721 if base in 'N':
1722 se = '.'.join(se.split('N'))
1723 if base in 'RYWMSKHDBV':
1724 expand = '[' + matching[base] + ']'
1725 se = expand.join(se.split(base))
1726 if re.match(se, other.ovhgseq):
1727 return True
1728 else:
1729 return False
1730
1731 @classmethod
1733 """Return a string representing the recognition site and cuttings.
1734
1735 Return a representation of the site with the cut on the (+) strand
1736 represented as '^' and the cut on the (-) strand as '_'.
1737 ie:
1738
1739 >>> EcoRI.elucidate() # 5' overhang
1740 'G^AATT_C'
1741 >>> KpnI.elucidate() # 3' overhang
1742 'G_GTAC^C'
1743 >>> EcoRV.elucidate() # blunt
1744 'GAT^_ATC'
1745 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1746 '? GTATAC ?'
1747 >>>
1748
1749 """
1750 f5 = cls.fst5
1751 f3 = cls.fst3
1752 length = len(cls)
1753 site = cls.site
1754 if cls.cut_twice():
1755 re = 'cut twice, not yet implemented sorry.'
1756 elif cls.is_5overhang():
1757 if f3 == f5 == 0:
1758 re = 'N^' + site + '_N'
1759 elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1760 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1761 elif 0 <= f5 <= length:
1762 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N'
1763 elif 0 <= f3 + length <= length:
1764 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1765 elif f3 + length < 0:
1766 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site
1767 elif f5 > length:
1768 re = site + (f5 - length) * 'N' + '^' + (length +
1769 f3 - f5) * 'N' + '_N'
1770 else:
1771 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N'
1772 elif cls.is_blunt():
1773 if f5 < 0:
1774 re = 'N^_' + abs(f5) * 'N' + site
1775 elif f5 > length:
1776 re = site + (f5 - length) * 'N' + '^_N'
1777 else:
1778 raise ValueError('%s.easyrepr() : error f5=%i'
1779 % (cls.name, f5))
1780 else:
1781 if f3 == 0:
1782 if f5 == 0:
1783 re = 'N_' + site + '^N'
1784 else:
1785 re = site + '_' + (f5 - length) * 'N' + '^N'
1786 elif 0 < f3 + length <= length and 0 <= f5 <= length:
1787 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1788 elif 0 < f3 + length <= length:
1789 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N'
1790 elif 0 <= f5 <= length:
1791 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:]
1792 elif f3 > 0:
1793 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N'
1794 elif f5 < 0:
1795 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \
1796 + site
1797 else:
1798 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \
1799 'N' + '^N'
1800 return re
1801
1804 """Implement methods for enzymes with non-characterized overhangs.
1805
1806 Correspond to NoCut and Unknown.
1807
1808 Internal use only. Not meant to be instantiated.
1809 """
1810
1811 @classmethod
1813 """Remove cuts that are outsite of the sequence (PRIVATE).
1814
1815 For internal use only.
1816
1817 Drop the site that are situated outside the sequence in linear
1818 sequence. Modify the index for site in circular sequences.
1819 """
1820 if cls.dna.is_linear():
1821 return
1822 else:
1823 length = len(cls.dna)
1824 for index, location in enumerate(cls.results):
1825 if location < 1:
1826 cls.results[index] += length
1827 else:
1828 break
1829 for index, location in enumerate(cls.results[:-1]):
1830 if location > length:
1831 cls.results[-(index + 1)] -= length
1832 else:
1833 break
1834 return
1835
1836 @classmethod
1838 """Return if recognition sequence and cut are defined.
1839
1840 True if the sequence recognised and cut is constant,
1841 i.e. the recognition site is not degenerated AND the enzyme cut inside
1842 the site.
1843
1844 Related methods:
1845 - RE.is_ambiguous()
1846 - RE.is_unknown()
1847
1848 """
1849 return False
1850
1851 @classmethod
1853 """Return if recognition sequence and cut may be ambiguous.
1854
1855 True if the sequence recognised and cut is ambiguous,
1856 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1857 the site.
1858
1859 Related methods:
1860 - RE.is_defined()
1861 - RE.is_unknown()
1862
1863 """
1864 return False
1865
1866 @classmethod
1868 """Return if recognition sequence is unknown.
1869
1870 True if the sequence is unknown,
1871 i.e. the recognition site has not been characterised yet.
1872
1873 Related methods:
1874 - RE.is_defined()
1875 - RE.is_ambiguous()
1876
1877 """
1878 return True
1879
1880 @classmethod
1882 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1883
1884 For internal use only.
1885
1886 Test for the compatibility of restriction ending of RE and other.
1887 """
1888
1889
1890
1891
1892
1893
1894
1895 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1896 % (str(cls), str(other), str(cls)))
1897
1898 @classmethod
1900 """Return a string representing the recognition site and cuttings.
1901
1902 Return a representation of the site with the cut on the (+) strand
1903 represented as '^' and the cut on the (-) strand as '_'.
1904 ie:
1905
1906 >>> EcoRI.elucidate() # 5' overhang
1907 'G^AATT_C'
1908 >>> KpnI.elucidate() # 3' overhang
1909 'G_GTAC^C'
1910 >>> EcoRV.elucidate() # blunt
1911 'GAT^_ATC'
1912 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1913 '? GTATAC ?'
1914 >>>
1915
1916 """
1917 return '? %s ?' % cls.site
1918
1921 """Implement methods for enzymes which are commercially available.
1922
1923 Internal use only. Not meant to be instantiated.
1924 """
1925
1926
1927
1928
1929
1930
1931 @classmethod
1933 """Print a list of suppliers of the enzyme."""
1934 for s in cls.suppl:
1935 print(suppliers_dict[s][0] + ',')
1936 return
1937
1938 @classmethod
1940 """Return a list of suppliers of the enzyme."""
1941 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1942
1943 @classmethod
1945 """Return the recommended buffer of the supplier for this enzyme.
1946
1947 Not implemented yet.
1948 """
1949 return
1950
1951 @classmethod
1953 """Return if enzyme is commercially available.
1954
1955 True if RE has suppliers.
1956 """
1957 return True
1958
1961 """Implement methods for enzymes which are not commercially available.
1962
1963 Internal use only. Not meant to be instantiated.
1964 """
1965
1966 @staticmethod
1968 """Print a list of suppliers of the enzyme."""
1969 return None
1970
1971 @classmethod
1973 """Return a list of suppliers of the enzyme."""
1974 return []
1975
1976 @classmethod
1978 """Return the recommended buffer of the supplier for this enzyme.
1979
1980 Not implemented yet.
1981 """
1982 raise TypeError("Enzyme not commercially available.")
1983
1984 @classmethod
1986 """Return if enzyme is commercially available.
1987
1988 True if RE has suppliers.
1989 """
1990 return False
1991
2001 """Class for operations on more than one enzyme."""
2002
2003 - def __init__(self, first=(), suppliers=()):
2011
2013 if len(self) < 5:
2014 return '+'.join(self.elements())
2015 else:
2016 return '...'.join(('+'.join(self.elements()[:2]),
2017 '+'.join(self.elements()[-2:])))
2018
2020 return 'RestrictionBatch(%s)' % self.elements()
2021
2028
2030 """Override '/' operator to use as search method."""
2031 return self.search(other)
2032
2034 """Override division with reversed operands to use as search method."""
2035 return self.search(other)
2036
2038 """Override Python 3 division operator to use as search method.
2039
2040 Like __div__.
2041 """
2042 return self.search(other)
2043
2045 """As __truediv___, with reversed operands.
2046
2047 Like __rdiv__.
2048 """
2049 return self.search(other)
2050
2051 - def get(self, enzyme, add=False):
2052 """Check if enzyme is in batch and return it.
2053
2054 If add is True and enzyme is not in batch add enzyme to batch.
2055 If add is False (which is the default) only return enzyme.
2056 If enzyme is not a RestrictionType or can not be evaluated to
2057 a RestrictionType, raise a ValueError.
2058 """
2059 e = self.format(enzyme)
2060 if e in self:
2061 return e
2062 elif add:
2063 self.add(e)
2064 return e
2065 else:
2066 raise ValueError('enzyme %s is not in RestrictionBatch'
2067 % e.__name__)
2068
2070 """Filter enzymes in batch with supplied function.
2071
2072 The new batch will contain only the enzymes for which
2073 func return True.
2074 """
2075 d = [x for x in filter(func, self)]
2076 new = RestrictionBatch()
2077 new._data = dict(zip(d, [True] * len(d)))
2078 return new
2079
2081 """Add all enzymes from a given supplier to batch.
2082
2083 letter represents the suppliers as defined in the dictionary
2084 RestrictionDictionary.suppliers
2085 Returns None.
2086 Raise a KeyError if letter is not a supplier code.
2087 """
2088 supplier = suppliers_dict[letter]
2089 self.suppliers.append(letter)
2090 for x in supplier[1]:
2091 self.add_nocheck(eval(x))
2092 return
2093
2095 """List the current suppliers for the restriction batch.
2096
2097 Return a sorted list of the suppliers which have been used to
2098 create the batch.
2099 """
2100 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
2101 return suppl_list
2102
2104 """Override '+=' for use with sets.
2105
2106 b += other -> add other to b, check the type of other.
2107 """
2108 self.add(other)
2109 return self
2110
2112 """Overide '+' for use with sets.
2113
2114 b + other -> new RestrictionBatch.
2115 """
2116 new = self.__class__(self)
2117 new.add(other)
2118 return new
2119
2121 """Remove enzyme from restriction batch.
2122
2123 Safe set.remove method. Verify that other is a RestrictionType or can
2124 be evaluated to a RestrictionType.
2125 Raise a ValueError if other can not be evaluated to a RestrictionType.
2126 Raise a KeyError if other is not in B.
2127 """
2128 return set.remove(self, self.format(other))
2129
2130 - def add(self, other):
2131 """Add a restriction enzyme to the restriction batch.
2132
2133 Safe set.add method. Verify that other is a RestrictionType or can be
2134 evaluated to a RestrictionType.
2135 Raise a ValueError if other can not be evaluated to a RestrictionType.
2136 """
2137 return set.add(self, self.format(other))
2138
2140 """Add restriction enzyme to batch without checking its type."""
2141 return set.add(self, other)
2142
2160
2162 """Return if enzyme (name) is a known enzyme.
2163
2164 True if y or eval(y) is a RestrictionType.
2165 """
2166 return (isinstance(y, RestrictionType) or
2167 isinstance(eval(str(y)), RestrictionType))
2168
2169 - def split(self, *classes, **bool):
2170 """Extract enzymes of a certain class and put in new RestrictionBatch.
2171
2172 B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
2173
2174 It works but it is slow, so it has really an interest when splitting
2175 over multiple conditions.
2176 """
2177 def splittest(element):
2178 for klass in classes:
2179 b = bool.get(klass.__name__, True)
2180 if issubclass(element, klass):
2181 if b:
2182 continue
2183 else:
2184 return False
2185 elif b:
2186 return False
2187 else:
2188 continue
2189 return True
2190 d = [k for k in filter(splittest, self)]
2191 new = RestrictionBatch()
2192 new._data = dict(zip(d, [True] * len(d)))
2193 return new
2194
2196 """List the enzymes of the RestrictionBatch as list of strings.
2197
2198 Give all the names of the enzymes in B sorted alphabetically.
2199 """
2200 return sorted(str(e) for e in self)
2201
2203 """List the names of the enzymes of the RestrictionBatch.
2204
2205 Return a list of the name of the elements of the batch.
2206 """
2207 return [str(e) for e in self]
2208
2209 @classmethod
2211 """Return a dicionary with supplier codes.
2212
2213 Letter code for the suppliers.
2214 """
2215 supply = dict((k, v[0]) for k, v in suppliers_dict.items())
2216 return supply
2217
2218 @classmethod
2220 """Print a list of supplier codes."""
2221 supply = [' = '.join(i) for i in cls.suppl_codes().items()]
2222 print('\n'.join(supply))
2223 return
2224
2225 - def search(self, dna, linear=True):
2226 """Return a dic of cutting sites in the seq for the batch enzymes."""
2227
2228
2229
2230
2231 if not hasattr(self, "already_mapped"):
2232
2233
2234 self.already_mapped = None
2235 if isinstance(dna, DNA):
2236
2237
2238
2239
2240 if (str(dna), linear) == self.already_mapped:
2241 return self.mapping
2242 else:
2243 self.already_mapped = str(dna), linear
2244 fseq = FormattedSeq(dna, linear)
2245 self.mapping = dict((x, x.search(fseq)) for x in self)
2246 return self.mapping
2247 elif isinstance(dna, FormattedSeq):
2248 if (str(dna), dna.linear) == self.already_mapped:
2249 return self.mapping
2250 else:
2251 self.already_mapped = str(dna), dna.linear
2252 self.mapping = dict((x, x.search(dna)) for x in self)
2253 return self.mapping
2254 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
2255 % type(dna))
2256
2257
2258
2259
2260
2261
2262
2263
2264 -class Analysis(RestrictionBatch, PrintFormat):
2265 """Provide methods for enhanced analysis and pretty printing."""
2266
2269 """Initialize an Analysis with RestrictionBatch and sequence.
2270
2271 Analysis([restrictionbatch [, sequence] linear=True])
2272 -> New Analysis class.
2273
2274 For most of the methods of this class if a dictionary is given it will
2275 be used as the base to calculate the results.
2276 If no dictionary is given a new analysis using the RestrictionBatch
2277 which has been given when the Analysis class has been instantiated,
2278 will be carried out and used.
2279 """
2280 RestrictionBatch.__init__(self, restrictionbatch)
2281 self.rb = restrictionbatch
2282 self.sequence = sequence
2283 self.linear = linear
2284 if self.sequence:
2285 self.search(self.sequence, self.linear)
2286
2288 return 'Analysis(%s,%s,%s)' %\
2289 (repr(self.rb), repr(self.sequence), self.linear)
2290
2292 """Filter result for keys which are in wanted (PRIVATE).
2293
2294 A._sub_set(other_set) -> dict.
2295
2296 Internal use only.
2297
2298 Screen the results through wanted set.
2299 Keep only the results for which the enzymes is in wanted set.
2300 """
2301
2302 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2303
2305 """Set boundaries to correct values (PRIVATE).
2306
2307 Format the boundaries for use with the methods that limit the
2308 search to only part of the sequence given to analyse.
2309 """
2310 if not isinstance(start, int):
2311 raise TypeError('expected int, got %s instead' % type(start))
2312 if not isinstance(end, int):
2313 raise TypeError('expected int, got %s instead' % type(end))
2314 if start < 1:
2315 start += len(self.sequence)
2316 if end < 1:
2317 end += len(self.sequence)
2318 if start < end:
2319 pass
2320 else:
2321 start, end = end, start
2322 if start < end:
2323 return start, end, self._test_normal
2324
2326 """Test if site is between start and end (PRIVATE).
2327
2328 Internal use only
2329 """
2330 return start <= site < end
2331
2333 """Test if site is between end and start, for circular sequences (PRIVATE).
2334
2335 Internal use only.
2336 """
2337 return start <= site <= len(self.sequence) or 1 <= site < end
2338
2349
2350 - def print_that(self, dct=None, title='', s1=''):
2351 """Print the output of the analysis.
2352
2353 A.print_that([dct[, title[, s1[,print_]]]]) -> print the results
2354 from dct.
2355
2356 If dct is not given the full dictionary is used.
2357 s1: Title for non-cutting enzymes
2358 This method prints the output of A.format_output() and it is here
2359 for backwards compatibility.
2360 """
2361 print(self.format_output(dct, title, s1))
2362
2364 """Change parameters of print output.
2365
2366 `A.change(**attribute_name)` -> Change attribute of Analysis.
2367
2368 It is possible to change the width of the shell by setting
2369 self.ConsoleWidth to what you want.
2370 self.NameWidth refer to the maximal length of the enzyme name.
2371
2372 Changing one of these parameters here might not give the results
2373 you expect. In which case, you can settle back to a 80 columns shell
2374 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2375 you get it right.
2376 """
2377 for k, v in what.items():
2378 if k in ('NameWidth', 'ConsoleWidth'):
2379 setattr(self, k, v)
2380 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2381 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2382 elif k == 'sequence':
2383 setattr(self, 'sequence', v)
2384 self.search(self.sequence, self.linear)
2385 elif k == 'rb':
2386 self = Analysis.__init__(self, v, self.sequence, self.linear)
2387 elif k == 'linear':
2388 setattr(self, 'linear', v)
2389 self.search(self.sequence, v)
2390 elif k in ('Indent', 'Maxsize'):
2391 setattr(self, k, v)
2392 elif k in ('Cmodulo', 'PrefWidth'):
2393 raise AttributeError(
2394 'To change %s, change NameWidth and/or ConsoleWidth' % k)
2395 else:
2396 raise AttributeError('Analysis has no attribute %s' % k)
2397 return
2398
2399 - def full(self, linear=True):
2400 """Perform analysis with all enzymes of batch and return all results.
2401
2402 A.full() -> dict.
2403
2404 Full Restriction Map of the sequence.
2405 """
2406 return self.mapping
2407
2408 - def blunt(self, dct=None):
2409 """Return only cuts that have blunt ends."""
2410 if not dct:
2411 dct = self.mapping
2412 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2413
2415 """Return only cuts that have 5' overhangs."""
2416 if not dct:
2417 dct = self.mapping
2418 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2419
2421 """Return only cuts that have 3' overhangs."""
2422 if not dct:
2423 dct = self.mapping
2424 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2425
2427 """Return only results from enzymes that produce defined overhangs."""
2428 if not dct:
2429 dct = self.mapping
2430 return dict((k, v) for k, v in dct.items() if k.is_defined())
2431
2433 """Return only results from enzyme with at least one cut."""
2434 if not dct:
2435 dct = self.mapping
2436 return dict((k, v) for k, v in dct.items() if v)
2437
2439 """Return only results from enzymes that don't cut the sequence."""
2440 if not dct:
2441 dct = self.mapping
2442 return dict((k, v) for k, v in dct.items() if not v)
2443
2445 """Return only results from enzymes that cut the sequence N times."""
2446 if not dct:
2447 dct = self.mapping
2448 return dict((k, v) for k, v in dct.items()if len(v) == N)
2449
2451 """Return only results from enzymes that cut (x,y,z,...) times."""
2452 if not dct:
2453 dct = self.mapping
2454 return dict((k, v) for k, v in dct.items() if len(v) in list)
2455
2457 """Return only results from enzymes which names are listed."""
2458 for i, enzyme in enumerate(names):
2459 if enzyme not in AllEnzymes:
2460 warnings.warn("no data for the enzyme: %s" % enzyme,
2461 BiopythonWarning)
2462 del names[i]
2463 if not dct:
2464 return RestrictionBatch(names).search(self.sequence, self.linear)
2465 return dict((n, dct[n]) for n in names if n in dct)
2466
2468 """Return only results form enzymes with a given site size."""
2469 sites = [name for name in self if name.size == site_size]
2470 if not dct:
2471 return RestrictionBatch(sites).search(self.sequence)
2472 return dict((k, v) for k, v in dct.items() if k in site_size)
2473
2475 """Return only results from enzymes that only cut within start, end."""
2476 start, end, test = self._boundaries(start, end)
2477 if not dct:
2478 dct = self.mapping
2479 d = dict(dct)
2480 for key, sites in dct.items():
2481 if not sites:
2482 del d[key]
2483 continue
2484 for site in sites:
2485 if test(start, end, site):
2486 continue
2487 else:
2488 del d[key]
2489 break
2490 return d
2491
2492 - def between(self, start, end, dct=None):
2493 """Return only results from enzymes that cut at least within borders.
2494
2495 Enzymes that cut the sequence at least in between start and end.
2496 They may cut outside as well.
2497 """
2498 start, end, test = self._boundaries(start, end)
2499 d = {}
2500 if not dct:
2501 dct = self.mapping
2502 for key, sites in dct.items():
2503 for site in sites:
2504 if test(start, end, site):
2505 d[key] = sites
2506 break
2507 continue
2508 return d
2509
2511 """Return only results from within start, end.
2512
2513 Enzymes must cut inside start/end and may also cut outside. However,
2514 only the cutting positions within start/end will be returned.
2515 """
2516 d = []
2517 if start <= end:
2518 d = [(k, [vv for vv in v if start <= vv <= end])
2519 for k, v in self.between(start, end, dct).items()]
2520 else:
2521 d = [(k, [vv for vv in v if start <= vv or vv <= end])
2522 for k, v in self.between(start, end, dct).items()]
2523 return dict(d)
2524
2526 """Return only results from enzymes that only cut outside start, end.
2527
2528 Enzymes that cut the sequence outside of the region
2529 in between start and end but do not cut inside.
2530 """
2531 start, end, test = self._boundaries(start, end)
2532 if not dct:
2533 dct = self.mapping
2534 d = dict(dct)
2535 for key, sites in dct.items():
2536 if not sites:
2537 del d[key]
2538 continue
2539 for site in sites:
2540 if test(start, end, site):
2541 del d[key]
2542 break
2543 else:
2544 continue
2545 return d
2546
2547 - def outside(self, start, end, dct=None):
2548 """Return only results from enzymes that at least cut outside borders.
2549
2550 Enzymes that cut outside the region in between start and end.
2551 They may cut inside as well.
2552 """
2553 start, end, test = self._boundaries(start, end)
2554 if not dct:
2555 dct = self.mapping
2556 d = {}
2557 for key, sites in dct.items():
2558 for site in sites:
2559 if test(start, end, site):
2560 continue
2561 else:
2562 d[key] = sites
2563 break
2564 return d
2565
2567 """Return only results from enzymes that don't cut between borders."""
2568 if not dct:
2569 dct = self.mapping
2570 d = self.without_site()
2571 d.update(self.only_outside(start, end, dct))
2572 return d
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597 CommOnly = RestrictionBatch()
2598 NonComm = RestrictionBatch()
2599 for TYPE, (bases, enzymes) in typedict.items():
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617 bases = tuple(eval(x) for x in bases)
2618
2619
2620
2621
2622 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2623 for k in enzymes:
2624
2625
2626
2627
2628
2629 newenz = T(k, bases, enzymedict[k])
2630
2631
2632
2633
2634
2635 if newenz.is_comm():
2636 CommOnly.add_nocheck(newenz)
2637 else:
2638 NonComm.add_nocheck(newenz)
2639
2640
2641
2642 AllEnzymes = RestrictionBatch(CommOnly)
2643 AllEnzymes.update(NonComm)
2644
2645
2646
2647 names = [str(x) for x in AllEnzymes]
2648 try:
2649 del x
2650 except NameError:
2651
2652 pass
2653 locals().update(dict(zip(names, AllEnzymes)))
2654 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes',
2655 'CommOnly', 'NonComm') + tuple(names)
2656 del k, enzymes, TYPE, bases, names
2657